#install.packages('readxl')
library(readxl)
#install.packages('tidyverse')
library(tidyverse)
#install.packages("fpp2")
library(fpp2)
#install.packages("forecast")
library(forecast)
#install.packages("ggplot2")
library(ggplot2)
install.packages("openxlsx")
library(openxlsx)data <- read_excel('/Users/annabellenarsama/Desktop/SeriesTemporelles/electrain.xlsx')
print(data)Timestamp <chr> | Power (kW) <dbl> | Temp (C°) <dbl> | ||
|---|---|---|---|---|
| 40179.052083333336 | 165.1 | 10.555556 | ||
| 1/1/2010 1:30 | 151.6 | 10.555556 | ||
| 1/1/2010 1:45 | 146.9 | 10.555556 | ||
| 1/1/2010 2:00 | 153.7 | 10.555556 | ||
| 1/1/2010 2:15 | 153.8 | 10.555556 | ||
| 1/1/2010 2:30 | 159.0 | 10.555556 | ||
| 1/1/2010 2:45 | 157.7 | 10.555556 | ||
| 1/1/2010 3:00 | 163.2 | 10.555556 | ||
| 1/1/2010 3:15 | 151.7 | 10.000000 | ||
| 1/1/2010 3:30 | 148.7 | 10.000000 |
Pour des raisons de symétrie, nous enlevons les 92 premières lignes qui correspondent au premier jour du mois, mais auxquelles il manque les 4 premières valeurs.
newdata <- data[-(1:91), ]
print(newdata)Timestamp <chr> | Power (kW) <dbl> | Temp (C°) <dbl> | ||
|---|---|---|---|---|
| 1/2/2010 0:00 | 163.1 | 13.333333 | ||
| 1/2/2010 0:15 | 154.4 | 10.555556 | ||
| 1/2/2010 0:30 | 152.2 | 10.555556 | ||
| 1/2/2010 0:45 | 158.7 | 10.555556 | ||
| 1/2/2010 1:00 | 163.8 | 10.555556 | ||
| 1/2/2010 1:15 | 158.7 | 10.000000 | ||
| 1/2/2010 1:30 | 152.3 | 10.000000 | ||
| 1/2/2010 1:45 | 155.2 | 10.000000 | ||
| 1/2/2010 2:00 | 155.9 | 10.000000 | ||
| 1/2/2010 2:15 | 152.1 | 10.000000 |
newdata["jour"] <- weekdays(as.POSIXct(newdata$Timestamp, format="%m/%d/%Y %H:%M"))
newdata["heure"] <- format(strptime(newdata$Timestamp, format="%m/%d/%Y %H:%M"))
print(newdata)Timestamp <chr> | Power (kW) <dbl> | Temp (C°) <dbl> | jour <chr> | heure <chr> |
|---|---|---|---|---|
| 1/2/2010 0:00 | 163.1 | 13.333333 | Saturday | 2010-01-02 00:00:00 |
| 1/2/2010 0:15 | 154.4 | 10.555556 | Saturday | 2010-01-02 00:15:00 |
| 1/2/2010 0:30 | 152.2 | 10.555556 | Saturday | 2010-01-02 00:30:00 |
| 1/2/2010 0:45 | 158.7 | 10.555556 | Saturday | 2010-01-02 00:45:00 |
| 1/2/2010 1:00 | 163.8 | 10.555556 | Saturday | 2010-01-02 01:00:00 |
| 1/2/2010 1:15 | 158.7 | 10.000000 | Saturday | 2010-01-02 01:15:00 |
| 1/2/2010 1:30 | 152.3 | 10.000000 | Saturday | 2010-01-02 01:30:00 |
| 1/2/2010 1:45 | 155.2 | 10.000000 | Saturday | 2010-01-02 01:45:00 |
| 1/2/2010 2:00 | 155.9 | 10.000000 | Saturday | 2010-01-02 02:00:00 |
| 1/2/2010 2:15 | 152.1 | 10.000000 | Saturday | 2010-01-02 02:15:00 |
elec <- ts(newdata$`Power (kW)`, start=c(1,6), end=c(51,96), freq=96)
print(elec)Time Series:
Start = c(1, 6)
End = c(51, 96)
Frequency = 96
[1] 163.1 154.4 152.2 158.7 163.8 158.7 152.3 155.2 155.9 152.1 154.1
[12] 155.9 156.8 153.9 152.2 165.6 168.8 160.5 160.6 161.1 160.7 157.0
[23] 161.7 158.4 165.8 166.4 168.0 159.5 164.2 170.3 178.8 181.5 182.4
[34] 270.9 269.4 273.1 268.3 277.7 269.8 268.0 258.3 260.7 257.2 256.3
[45] 255.0 270.9 269.6 307.6 283.5 266.1 295.9 278.5 269.5 297.5 294.4
[56] 300.7 287.5 288.4 288.4 283.9 310.4 285.3 288.9 277.1 280.3 271.8
[67] 301.8 282.2 285.0 326.2 311.7 313.4 305.0 300.7 303.2 299.8 307.7
[78] 315.8 304.1 305.6 290.9 289.4 285.2 276.6 282.0 291.6 286.9 290.1
[89] 285.1 285.7 286.6 281.9 281.3 190.3 194.8 163.7 159.3 158.4 152.9
[100] 159.6 161.6 159.9 149.8 148.7 153.2 150.6 151.6 160.6 165.7 154.9
[111] 149.0 163.5 161.2 158.6 154.9 162.0 162.7 160.3 162.3 168.3 165.3
[122] 166.8 166.7 172.6 171.1 175.5 185.4 188.8 186.6 267.0 267.3 262.6
[133] 258.2 262.0 259.4 256.6 258.2 262.8 264.7 262.9 263.8 270.7 264.2
[144] 269.3 265.9 265.7 261.7 270.9 280.0 281.0 278.9 276.5 276.0 273.1
[155] 270.2 276.3 265.0 266.2 267.3 275.4 273.0 273.0 272.5 266.3 284.0
[166] 315.0 313.9 314.4 312.9 310.0 310.1 310.1 317.6 312.9 308.0 306.9
[177] 305.5 297.2 293.6 300.0 292.8 288.1 285.8 286.9 283.8 193.6 191.7
[188] 158.9 169.1 161.4 166.9 165.0 164.1 168.5 155.4 166.7 161.3 163.1
[199] 152.5 147.0 154.6 161.0 158.9 159.3 165.4 160.9 151.5 157.2 166.1
[210] 161.9 161.4 167.9 162.2 166.7 163.5 164.0 170.2 166.3 177.2 174.5
[221] 168.1 189.4 188.9 176.8 181.7 268.0 261.1 258.5 261.7 264.2 258.3
[232] 266.4 267.4 265.7 265.2 264.2 266.3 262.8 262.4 265.5 270.7 261.6
[243] 272.9 275.8 271.0 276.3 274.0 276.1 278.1 274.5 276.6 271.6 271.9
[254] 272.8 271.0 271.9 277.7 267.8 266.1 269.5 279.1 312.1 314.1 309.7
[265] 310.2 302.1 301.4 304.5 305.6 312.9 306.0 308.3 305.1 296.7 299.9
[276] 298.2 300.2 300.4 283.8 277.6 286.2 288.7 293.2 288.2 286.5 191.9
[287] 198.2 170.3 162.5 160.2 146.1 159.8 165.4 154.7 156.3 154.1 160.8
[298] 157.0 155.9 161.3 161.0 153.8 143.3 154.8 163.6 161.8 160.7 160.6
[309] 157.2 154.1 162.3 161.4 156.6 161.7 164.4 178.8 184.2 178.9 184.6
[320] 174.9 180.3 272.6 263.5 257.3 262.8 269.4 266.7 262.9 265.7 263.7
[331] 269.7 265.9 274.2 273.6 268.6 268.4 265.5 269.2 271.1 273.5 277.8
[342] 270.9 273.0 269.8 274.5 267.4 273.3 264.5 265.5 267.1 272.7 268.4
[353] 271.0 268.6 266.1 270.9 279.3 312.1 308.1 305.1 302.3 305.0 305.9
[364] 298.4 304.0 309.3 302.2 302.2 311.6 308.7 305.6 303.5 303.0 300.4
[375] 299.3 299.8 287.9 288.7 291.7 290.3 288.6 193.7 198.5 167.3 162.3
[386] 160.2 148.8 154.0 164.0 168.6 153.6 148.7 159.4 151.4 155.0 154.7
[397] 158.6 156.2 149.8 155.3 159.4 160.1 155.2 161.5 159.8 157.4 161.1
[408] 162.0 160.0 163.4 166.2 170.5 175.0 184.7 198.3 181.2 192.8 274.0
[419] 269.9 258.8 261.8 268.8 264.5 268.5 266.6 261.6 269.2 256.9 260.2
[430] 269.4 269.5 267.9 266.6 272.3 271.9 265.1 257.4 260.7 261.0 265.4
[441] 274.3 273.9 265.8 269.7 264.4 271.3 267.3 258.4 265.7 272.1 268.7
[452] 266.7 272.3 308.3 310.2 308.3 310.1 309.3 303.7 303.0 304.4 313.9
[463] 307.8 310.6 301.8 308.0 304.7 305.8 309.5 305.5 303.7 302.5 293.2
[474] 284.6 286.5 283.0 283.2 187.2 194.6 156.2 161.1 155.5 155.0 153.2
[485] 159.6 164.4 151.6 143.0 147.9 155.2 157.7 152.9 158.5 146.4 145.2
[496] 156.3 154.7 159.3 159.2 160.4 157.6 153.8 152.7 158.6 155.5 153.8
[507] 163.4 168.4 168.0 184.1 193.5 172.9 182.9 275.5 265.7 259.1 262.5
[518] 265.5 258.0 261.1 268.9 259.0 256.4 264.4 268.2 275.9 270.9 268.6
[529] 264.5 263.5 270.2 269.6 274.1 274.3 270.8 274.5 274.9 273.7 276.2
[540] 266.0 270.8 270.8 265.7 270.9 266.7 271.3 268.3 267.3 276.5 311.1
[551] 310.5 313.6 308.9 308.5 308.1 304.5 302.4 309.7 306.4 303.1 301.2
[562] 305.8 300.5 298.1 304.0 302.7 299.0 301.2 302.5 287.9 286.4 284.6
[573] 287.1 194.4 202.0 167.0 164.6 161.4 156.2 162.8 161.8 166.0 153.5
[584] 157.0 162.6 159.1 167.3 161.6 162.3 155.0 152.0 161.7 160.3 155.9
[595] 160.3 161.3 159.1 164.8 167.0 170.9 170.4 173.8 178.8 172.0 168.1
[606] 177.1 180.6 185.5 172.6 258.1 267.0 265.9 262.4 269.3 264.2 263.1
[617] 265.0 259.6 262.9 270.5 267.2 268.9 266.7 268.5 271.7 271.5 274.8
[628] 272.3 269.7 271.3 270.6 274.4 275.5 269.6 266.4 264.2 266.6 270.1
[639] 270.1 274.5 275.4 271.3 271.8 271.1 280.2 319.5 317.1 317.7 311.3
[650] 306.1 307.3 308.4 308.9 313.0 307.2 305.2 304.9 306.3 301.1 298.9
[661] 299.9 296.6 295.4 303.8 299.3 282.4 283.3 279.9 285.1 191.7 190.5
[672] 157.8 157.6 159.3 144.0 157.1 157.5 162.7 149.7 146.9 160.3 153.2
[683] 156.7 154.7 154.7 151.9 144.3 151.7 156.7 150.6 154.9 150.0 153.6
[694] 151.5 158.5 161.5 153.4 158.8 154.9 160.9 164.0 184.9 188.1 173.3
[705] 172.4 268.0 263.1 260.0 260.0 260.8 255.7 264.3 267.3 261.0 260.9
[716] 261.7 265.1 267.2 261.0 269.9 263.8 268.2 269.3 269.3 266.6 263.9
[727] 264.2 270.4 270.2 262.8 263.3 270.3 272.2 270.4 267.1 275.2 270.5
[738] 273.3 266.6 261.7 266.5 310.3 306.8 307.1 316.8 310.0 306.0 307.1
[749] 308.4 309.3 307.2 306.4 308.5 304.3 301.2 300.0 303.5 304.6 292.1
[760] 283.4 284.1 281.0 284.8 287.8 293.5 196.9 193.8 168.3 164.3 160.6
[771] 152.8 152.6 160.8 154.4 155.4 151.9 149.4 145.5 146.3 148.8 147.8
[782] 148.4 153.5 149.3 147.0 156.0 157.7 159.7 153.0 153.2 155.5 161.0
[793] 160.6 158.5 167.0 161.2 162.6 175.0 193.2 180.5 180.2 261.1 256.5
[804] 256.6 254.6 258.2 257.3 261.4 254.6 252.1 260.3 264.1 265.1 271.1
[815] 267.5 261.3 268.3 260.2 267.4 270.7 265.2 269.5 268.9 269.4 270.8
[826] 270.0 273.4 271.3 268.0 276.4 273.4 270.2 273.3 266.8 270.1 272.0
[837] 272.0 310.5 314.4 314.0 312.9 300.6 307.2 303.8 307.5 313.8 306.3
[848] 298.1 302.4 304.1 306.9 307.0 306.6 293.1 294.3 291.3 287.6 199.3
[859] 199.4 163.0 165.6 161.7 166.4 166.8 169.9 170.3 160.4 163.9 173.1
[870] 169.6 162.9 154.9 159.2 150.8 168.0 163.9 168.6 152.0 158.0 168.4
[881] 169.7 164.1 163.1 167.4 168.6 166.4 173.7 172.1 175.2 171.5 183.2
[892] 199.1 190.8 190.1 185.9 185.2 184.1 269.8 264.6 262.6 263.7 263.9
[903] 258.2 259.2 266.0 264.5 270.7 268.3 269.4 264.3 267.7 267.0 273.4
[914] 276.2 272.9 272.3 270.2 269.1 273.1 271.2 267.1 271.1 266.2 272.9
[925] 269.3 265.0 261.7 267.1 264.8 267.3 263.5 264.1 263.5 307.0 311.0
[936] 308.4 311.6 304.2 304.3 295.3 293.3 302.4 304.1 302.5 301.3 298.8
[947] 303.2 300.3 298.3 296.6 292.7 296.8 291.8 290.1 288.7 287.3 285.6
[958] 189.2 199.0 163.9 160.0 164.6 152.9 165.8 160.7 154.1 162.7 161.1
[969] 159.1 160.4 161.0 161.9 163.4 160.2 156.1 160.8 160.3 164.3 164.7
[980] 161.5 164.1 165.5 171.3 176.7 168.5 170.0 178.3 180.0 179.2 187.0
[991] 200.6 185.5 190.1 282.3 275.1 266.5 273.4 283.3 273.8 278.7
[ reached getOption("max.print") -- omitted 3891 entries ]
autoplot(elec)On remarque un pic à 0 dans les données. Ce sont des valeurs à remplacer pour permettre la suite des analyses.
On remplace à vue d’oeil les valeurs nulles. En effet, on peut voir que ces valeurs sont sur un pic qui tourne autour de 150. On fixe donc ces valeurs à 150.
newdata$`Power (kW)`[newdata$`Power (kW)` == 0] <- 150 # remplacées à l'oeil nu
elec <- ts(newdata$`Power (kW)`, start=c(1,1), end=c(51,96), freq=96)
print(elec)Time Series:
Start = c(1, 1)
End = c(51, 96)
Frequency = 96
[1] 163.1 154.4 152.2 158.7 163.8 158.7 152.3 155.2 155.9 152.1 154.1
[12] 155.9 156.8 153.9 152.2 165.6 168.8 160.5 160.6 161.1 160.7 157.0
[23] 161.7 158.4 165.8 166.4 168.0 159.5 164.2 170.3 178.8 181.5 182.4
[34] 270.9 269.4 273.1 268.3 277.7 269.8 268.0 258.3 260.7 257.2 256.3
[45] 255.0 270.9 269.6 307.6 283.5 266.1 295.9 278.5 269.5 297.5 294.4
[56] 300.7 287.5 288.4 288.4 283.9 310.4 285.3 288.9 277.1 280.3 271.8
[67] 301.8 282.2 285.0 326.2 311.7 313.4 305.0 300.7 303.2 299.8 307.7
[78] 315.8 304.1 305.6 290.9 289.4 285.2 276.6 282.0 291.6 286.9 290.1
[89] 285.1 285.7 286.6 281.9 281.3 190.3 194.8 163.7 159.3 158.4 152.9
[100] 159.6 161.6 159.9 149.8 148.7 153.2 150.6 151.6 160.6 165.7 154.9
[111] 149.0 163.5 161.2 158.6 154.9 162.0 162.7 160.3 162.3 168.3 165.3
[122] 166.8 166.7 172.6 171.1 175.5 185.4 188.8 186.6 267.0 267.3 262.6
[133] 258.2 262.0 259.4 256.6 258.2 262.8 264.7 262.9 263.8 270.7 264.2
[144] 269.3 265.9 265.7 261.7 270.9 280.0 281.0 278.9 276.5 276.0 273.1
[155] 270.2 276.3 265.0 266.2 267.3 275.4 273.0 273.0 272.5 266.3 284.0
[166] 315.0 313.9 314.4 312.9 310.0 310.1 310.1 317.6 312.9 308.0 306.9
[177] 305.5 297.2 293.6 300.0 292.8 288.1 285.8 286.9 283.8 193.6 191.7
[188] 158.9 169.1 161.4 166.9 165.0 164.1 168.5 155.4 166.7 161.3 163.1
[199] 152.5 147.0 154.6 161.0 158.9 159.3 165.4 160.9 151.5 157.2 166.1
[210] 161.9 161.4 167.9 162.2 166.7 163.5 164.0 170.2 166.3 177.2 174.5
[221] 168.1 189.4 188.9 176.8 181.7 268.0 261.1 258.5 261.7 264.2 258.3
[232] 266.4 267.4 265.7 265.2 264.2 266.3 262.8 262.4 265.5 270.7 261.6
[243] 272.9 275.8 271.0 276.3 274.0 276.1 278.1 274.5 276.6 271.6 271.9
[254] 272.8 271.0 271.9 277.7 267.8 266.1 269.5 279.1 312.1 314.1 309.7
[265] 310.2 302.1 301.4 304.5 305.6 312.9 306.0 308.3 305.1 296.7 299.9
[276] 298.2 300.2 300.4 283.8 277.6 286.2 288.7 293.2 288.2 286.5 191.9
[287] 198.2 170.3 162.5 160.2 146.1 159.8 165.4 154.7 156.3 154.1 160.8
[298] 157.0 155.9 161.3 161.0 153.8 143.3 154.8 163.6 161.8 160.7 160.6
[309] 157.2 154.1 162.3 161.4 156.6 161.7 164.4 178.8 184.2 178.9 184.6
[320] 174.9 180.3 272.6 263.5 257.3 262.8 269.4 266.7 262.9 265.7 263.7
[331] 269.7 265.9 274.2 273.6 268.6 268.4 265.5 269.2 271.1 273.5 277.8
[342] 270.9 273.0 269.8 274.5 267.4 273.3 264.5 265.5 267.1 272.7 268.4
[353] 271.0 268.6 266.1 270.9 279.3 312.1 308.1 305.1 302.3 305.0 305.9
[364] 298.4 304.0 309.3 302.2 302.2 311.6 308.7 305.6 303.5 303.0 300.4
[375] 299.3 299.8 287.9 288.7 291.7 290.3 288.6 193.7 198.5 167.3 162.3
[386] 160.2 148.8 154.0 164.0 168.6 153.6 148.7 159.4 151.4 155.0 154.7
[397] 158.6 156.2 149.8 155.3 159.4 160.1 155.2 161.5 159.8 157.4 161.1
[408] 162.0 160.0 163.4 166.2 170.5 175.0 184.7 198.3 181.2 192.8 274.0
[419] 269.9 258.8 261.8 268.8 264.5 268.5 266.6 261.6 269.2 256.9 260.2
[430] 269.4 269.5 267.9 266.6 272.3 271.9 265.1 257.4 260.7 261.0 265.4
[441] 274.3 273.9 265.8 269.7 264.4 271.3 267.3 258.4 265.7 272.1 268.7
[452] 266.7 272.3 308.3 310.2 308.3 310.1 309.3 303.7 303.0 304.4 313.9
[463] 307.8 310.6 301.8 308.0 304.7 305.8 309.5 305.5 303.7 302.5 293.2
[474] 284.6 286.5 283.0 283.2 187.2 194.6 156.2 161.1 155.5 155.0 153.2
[485] 159.6 164.4 151.6 143.0 147.9 155.2 157.7 152.9 158.5 146.4 145.2
[496] 156.3 154.7 159.3 159.2 160.4 157.6 153.8 152.7 158.6 155.5 153.8
[507] 163.4 168.4 168.0 184.1 193.5 172.9 182.9 275.5 265.7 259.1 262.5
[518] 265.5 258.0 261.1 268.9 259.0 256.4 264.4 268.2 275.9 270.9 268.6
[529] 264.5 263.5 270.2 269.6 274.1 274.3 270.8 274.5 274.9 273.7 276.2
[540] 266.0 270.8 270.8 265.7 270.9 266.7 271.3 268.3 267.3 276.5 311.1
[551] 310.5 313.6 308.9 308.5 308.1 304.5 302.4 309.7 306.4 303.1 301.2
[562] 305.8 300.5 298.1 304.0 302.7 299.0 301.2 302.5 287.9 286.4 284.6
[573] 287.1 194.4 202.0 167.0 164.6 161.4 156.2 162.8 161.8 166.0 153.5
[584] 157.0 162.6 159.1 167.3 161.6 162.3 155.0 152.0 161.7 160.3 155.9
[595] 160.3 161.3 159.1 164.8 167.0 170.9 170.4 173.8 178.8 172.0 168.1
[606] 177.1 180.6 185.5 172.6 258.1 267.0 265.9 262.4 269.3 264.2 263.1
[617] 265.0 259.6 262.9 270.5 267.2 268.9 266.7 268.5 271.7 271.5 274.8
[628] 272.3 269.7 271.3 270.6 274.4 275.5 269.6 266.4 264.2 266.6 270.1
[639] 270.1 274.5 275.4 271.3 271.8 271.1 280.2 319.5 317.1 317.7 311.3
[650] 306.1 307.3 308.4 308.9 313.0 307.2 305.2 304.9 306.3 301.1 298.9
[661] 299.9 296.6 295.4 303.8 299.3 282.4 283.3 279.9 285.1 191.7 190.5
[672] 157.8 157.6 159.3 144.0 157.1 157.5 162.7 149.7 146.9 160.3 153.2
[683] 156.7 154.7 154.7 151.9 144.3 151.7 156.7 150.6 154.9 150.0 153.6
[694] 151.5 158.5 161.5 153.4 158.8 154.9 160.9 164.0 184.9 188.1 173.3
[705] 172.4 268.0 263.1 260.0 260.0 260.8 255.7 264.3 267.3 261.0 260.9
[716] 261.7 265.1 267.2 261.0 269.9 263.8 268.2 269.3 269.3 266.6 263.9
[727] 264.2 270.4 270.2 262.8 263.3 270.3 272.2 270.4 267.1 275.2 270.5
[738] 273.3 266.6 261.7 266.5 310.3 306.8 307.1 316.8 310.0 306.0 307.1
[749] 308.4 309.3 307.2 306.4 308.5 304.3 301.2 300.0 303.5 304.6 292.1
[760] 283.4 284.1 281.0 284.8 287.8 293.5 196.9 193.8 168.3 164.3 160.6
[771] 152.8 152.6 160.8 154.4 155.4 151.9 149.4 145.5 146.3 148.8 147.8
[782] 148.4 153.5 149.3 147.0 156.0 157.7 159.7 153.0 153.2 155.5 161.0
[793] 160.6 158.5 167.0 161.2 162.6 175.0 193.2 180.5 180.2 261.1 256.5
[804] 256.6 254.6 258.2 257.3 261.4 254.6 252.1 260.3 264.1 265.1 271.1
[815] 267.5 261.3 268.3 260.2 267.4 270.7 265.2 269.5 268.9 269.4 270.8
[826] 270.0 273.4 271.3 268.0 276.4 273.4 270.2 273.3 266.8 270.1 272.0
[837] 272.0 310.5 314.4 314.0 312.9 300.6 307.2 303.8 307.5 313.8 306.3
[848] 298.1 302.4 304.1 306.9 307.0 306.6 293.1 294.3 291.3 287.6 199.3
[859] 199.4 163.0 165.6 161.7 166.4 166.8 169.9 170.3 160.4 163.9 173.1
[870] 169.6 162.9 154.9 159.2 150.8 168.0 163.9 168.6 152.0 158.0 168.4
[881] 169.7 164.1 163.1 167.4 168.6 166.4 173.7 172.1 175.2 171.5 183.2
[892] 199.1 190.8 190.1 185.9 185.2 184.1 269.8 264.6 262.6 263.7 263.9
[903] 258.2 259.2 266.0 264.5 270.7 268.3 269.4 264.3 267.7 267.0 273.4
[914] 276.2 272.9 272.3 270.2 269.1 273.1 271.2 267.1 271.1 266.2 272.9
[925] 269.3 265.0 261.7 267.1 264.8 267.3 263.5 264.1 263.5 307.0 311.0
[936] 308.4 311.6 304.2 304.3 295.3 293.3 302.4 304.1 302.5 301.3 298.8
[947] 303.2 300.3 298.3 296.6 292.7 296.8 291.8 290.1 288.7 287.3 285.6
[958] 189.2 199.0 163.9 160.0 164.6 152.9 165.8 160.7 154.1 162.7 161.1
[969] 159.1 160.4 161.0 161.9 163.4 160.2 156.1 160.8 160.3 164.3 164.7
[980] 161.5 164.1 165.5 171.3 176.7 168.5 170.0 178.3 180.0 179.2 187.0
[991] 200.6 185.5 190.1 282.3 275.1 266.5 273.4 283.3 273.8 278.7
[ reached getOption("max.print") -- omitted 3896 entries ]
autoplot(elec) # visualisation des valeurs remplacéesOn décompose la série.
autoplot(decompose(elec, type="additive"))Il ne semble pas y avoir de tendance. On se concentrera donc sur des modèles saisonniers.
On divise nos données en ensembles d’apprentissage et de test pour un rapport de 80/20. L’ensemble d’apprentissage commence ainsi le premier jour du jeu de données (2 Janvier 2010) à la première heure, et se termine le quarantième jour (10 Février 2010) à la dernière heure.
train <- window(elec, start=c(1,1), end=c(40,96))
test <- window(elec, start=c(41,1), end=c(50,96))On affiche les 2 ensembles simultanément :
plot(train, xlim=c(1,52), ylim=c(100,380))
lines(test, lty=2)On lance un Lissage Exponentiel Simple, car la meilleure prédiction a priori est une constante.
Modélisation :
LES = HoltWinters(train, alpha=NULL, beta=FALSE, gamma=FALSE)
print(LES)Holt-Winters exponential smoothing without trend and without seasonal component.
Call:
HoltWinters(x = train, alpha = NULL, beta = FALSE, gamma = FALSE)
Smoothing parameters:
alpha: 0.9860426
beta : FALSE
gamma: FALSE
Coefficients:
[,1]
a 156.3719
On prédit avec la constante trouvée précédemment. On constate que la moyenne, qui équivaut à la moyenne des valeurs de ‘Power’ est aux environs de 150. Notre valeur fixée a priori pour remplacer les valeurs nulles n’est pas absurde, bien que l’on pourrait la remplacer par la valeur exacte calculée par ce modèle.
Prédiction :
pred1 <- predict(LES, n.ahead=960) # prédiction sur les 10 jours suivants
plot(test)
lines(pred1, col=2) # prédiction à partir du train setCoefficient de la constante du Lissage Exponentiel Simple :
print(LES$alpha) # 0.9860426[1] 0.9860426
Évaluation - RMSE du Lissage Exponentiel Simple :
print(sqrt(mean((pred1-test)^2))) # 93.28624[1] 93.28624
On lance un Holt Winters saisonnier avec une constante alpha et une saisonnalité gamma.
Modélisation :
HW = HoltWinters(train, alpha=NULL, beta=FALSE, gamma=NULL)
print(HW)Holt-Winters exponential smoothing without trend and with additive seasonal component.
Call:
HoltWinters(x = train, alpha = NULL, beta = FALSE, gamma = NULL)
Smoothing parameters:
alpha: 0.7831196
beta : FALSE
gamma: 0.8904545
Coefficients:
[,1]
a 240.11894
s1 -85.53653
s2 -81.59209
s3 -84.89480
s4 -78.20116
s5 -72.88999
s6 -71.68576
s7 -85.21335
s8 -83.36854
s9 -79.97476
s10 -79.19373
s11 -78.56924
s12 -83.33520
s13 -85.83806
s14 -89.88809
s15 -88.66898
s16 -83.77997
s17 -81.77421
s18 -82.99303
s19 -81.97842
s20 -79.56752
s21 -78.70675
s22 -78.71416
s23 -80.33166
s24 -76.59747
s25 -75.18980
s26 -68.89655
s27 -57.44692
s28 -56.74519
s29 -60.28571
s30 -61.69323
s31 -70.64407
s32 -63.30628
s33 -58.80193
s34 23.24709
s35 24.12167
s36 20.54998
s37 22.51192
s38 28.38224
s39 22.49509
s40 23.29417
s41 26.44203
s42 22.38805
s43 24.39604
s44 26.29052
s45 24.03414
s46 25.34776
s47 24.20117
s48 22.40583
s49 24.49166
s50 27.72696
s51 35.45720
s52 37.98888
s53 30.40060
s54 34.20281
s55 35.84587
s56 35.46848
s57 35.39524
s58 38.46045
s59 41.43965
s60 42.53769
s61 41.54371
s62 44.69122
s63 41.75707
s64 46.65363
s65 45.10486
s66 47.29578
s67 49.76163
s68 48.48894
s69 43.75145
s70 47.81166
s71 52.60843
s72 105.88779
s73 108.53680
s74 103.49910
s75 95.49748
s76 89.73647
s77 84.74433
s78 83.48915
s79 74.42472
s80 73.32390
s81 70.05516
s82 67.09232
s83 60.54797
s84 60.42753
s85 59.10355
s86 55.27885
s87 52.05517
s88 51.74057
s89 47.41802
s90 29.00408
s91 25.76486
s92 18.88309
s93 17.50113
s94 -57.21982
s95 -55.69980
s96 -84.05392
On voit bien les 96 périodes de la saisonnalité.
Prédiction :
pred2 <- predict(HW, n.ahead=960)
plot(test)
lines(pred2, col=3) # prédiction à partir du train setA priori, ce modèle n’est pas si mauvais.
Coefficients de la constante et de la saisonnalité du Holt Winters :
print(HW$alpha) # 0.7831196 alpha
0.7831196
print(HW$gamma) # 0.8904545 gamma
0.8904545
Évaluation - RMSE du Holt Winters saisonnier :
print(sqrt(mean((pred2-test)^2))) # 21.26563[1] 21.26563
Après avoir lancé les modèles a priori, on continue avec un auto-ARIMA pour trouver le meilleur modèle théorique.
Modélisation :
model3 = auto.arima(train)
install.packages("openxlsx")trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.3/openxlsx_4.2.5.2.tgz'
Content type 'application/x-gzip' length 3246918 bytes (3.1 MB)
==================================================
downloaded 3.1 MB
The downloaded binary packages are in
/var/folders/mk/hc7kkglj5fncc3271_yfcf6r0000gn/T//RtmpUrPkjh/downloaded_packages
summary(model3) # AIC : 27696.77Series: train
ARIMA(1,0,0)(0,1,0)[96]
Coefficients:
ar1
0.7815
s.e. 0.0102
sigma^2 = 95.46: log likelihood = -13846.39
AIC=27696.77 AICc=27696.77 BIC=27709.23
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set -0.06254446 9.646271 5.620404 -0.1185649 2.611593 0.7134771
ACF1
Training set 0.0003086627
L’auto-ARIMA nous donne un SARIMA d’ordre 1, , et de période 96.
Prédiction :
pred3 = forecast(model3, h=960)RMSE de l’ARIMA :
print(sqrt(mean((pred3$mean-test)^2))) # 15.71738[1] 15.71738
Notre modèle généré par l’auto-ARIMA est le meilleur jusqu’ici des 3 créés. Mais nous devons maintenant nous assurer que les résidus de la série sont indépendants du passé.
checkresiduals(model3)
Ljung-Box test
data: Residuals from ARIMA(1,0,0)(0,1,0)[96]
Q* = 1522.8, df = 191, p-value < 2.2e-16
Model df: 1. Total lags used: 192
Les résidus ne sont donc pas indépendants. Nous devons par conséquent différencier la série afin d’extraire les résidus et les rendre indépendants.
Il faudrait supprimer la saisonnalité de la série temporelle afin de pouvoir lancer un SARIMA.Pour cela, il faudrait différencier la série avec un lag spécifique. Il faudrait également calculer les auto-corrélations (ggAcf) et les auto-corrélations partielles (ggPacf) pour trouver l’ordre du SARIMA. Il faudrait également s’assurer de l’indépendance des résidus avec un box-test : les résidus sont-ils du bruit blanc ?
tmp = diff(train, lag=96)
plot(tmp)ggAcf(tmp)ggPacf(tmp)On constate que les résidus ne sont pas indépendants après une différenciation de la série.
tmp1 = diff(tmp, lag=192)
plot(tmp1)ggAcf(tmp1)ggPacf(tmp1)Après avoir rendu les résidus de la série indépendants, nous pouvons lancer un SARIMA avec les paramètres trouvés précédemment, ainsi que d’autres modèles.
Modélisation :
model4 = nnetar(train)
print(model4)Series: train
Model: NNAR(20,1,11)[96]
Call: nnetar(y = train)
Average of 20 networks, each of which is
a 21-11-1 network with 254 weights
options were - linear output units
sigma^2 estimated as 46.92
Prédiction :
pred4 = forecast(model4, h=960)Évaluation :
print(sqrt(mean((pred4$mean-test)^2)))[1] 68.82039
Les réseaux de neurones ne sont pas très bon, probablement parce que notre série n’est pas stationnaire.
Nous insérons un graphique qui affiche simultanément chaque prédiction.
par(mfrow=c(1,1))
plot(test, xlim=c(41,51), ylim=c(120,700))
lines(test, lty=2)lines(pred1, col=2)
lines(pred2, col=3)lines(pred3$mean, col=4)
lines(pred4$mean, col=5)legend('topleft',
col=1:5,
lty=1,
legend=c('Vraies Données',
'Prédictions avec LES',
'Prédictions avec HW',
'ARIMA',
'Réseaux de Neurones'))SAR = Arima(elec, order=c(1,0,0), seasonal=c(0,1,0))
summary(SAR)Series: elec
ARIMA(1,0,0)(0,1,0)[96]
Coefficients:
ar1
0.7164
s.e. 0.0102
sigma^2 = 128.3: log likelihood = -18092.79
AIC=36189.58 AICc=36189.58 BIC=36202.49
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set -0.1144681 11.21379 6.400623 -0.1672996 2.887978 0.7451869
ACF1
Training set -0.0708581
pred = forecast(SAR, h=96)
autoplot(elec)+autolayer(pred)
#predictions <- as.numeric(pred$mean)
#pred_df <- data.frame(Prediction = predictions)Ici, nous tentons de prédire la consommation d’électricité avec la température comme covariable.
power <- ts(newdata$`Power (kW)`, start=c(1,6), end=c(51,96), freq=96)
temperature <- ts(newdata$`Temp (C°)`, start=c(1,6), end=c(51,96), freq=96)
power_train <- window(power, start=c(1,1), end=c(40,96))
power_test <- window(power, start=c(41,1), end=c(50,96))
temperature_train <- window(temperature, start=c(1,1), end=c(40,96))
temperature_test <- window(temperature, start=c(41,1), end=c(50,96))Nous lançons des modèles a priori pour voir le lien entre les 2 variables.
fit1 = tslm(power_train ~ temperature_train)
summary(fit1)
Call:
tslm(formula = power_train ~ temperature_train)
Residuals:
Min 1Q Median 3Q Max
-121.545 -42.092 2.155 43.037 111.973
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 126.0818 3.4186 36.88 <2e-16 ***
temperature_train 9.9478 0.3137 31.71 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 50.83 on 3833 degrees of freedom
Multiple R-squared: 0.2078, Adjusted R-squared: 0.2076
F-statistic: 1005 on 1 and 3833 DF, p-value: < 2.2e-16
fit2 = tslm(power_train ~ temperature_train+season+trend)
summary(fit2)
Call:
tslm(formula = power_train ~ temperature_train + season + trend)
Residuals:
Min 1Q Median 3Q Max
-112.032 -4.617 0.248 4.591 58.331
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.611e+02 2.092e+00 124.776 < 2e-16 ***
temperature_train 1.039e+00 9.814e-02 10.589 < 2e-16 ***
season2 1.107e+00 2.622e+00 0.422 0.67291
season3 -7.446e+01 2.623e+00 -28.392 < 2e-16 ***
season4 -7.221e+01 2.623e+00 -27.533 < 2e-16 ***
season5 -1.013e+02 2.623e+00 -38.632 < 2e-16 ***
season6 -1.044e+02 2.606e+00 -40.074 < 2e-16 ***
season7 -1.050e+02 2.607e+00 -40.273 < 2e-16 ***
season8 -1.111e+02 2.607e+00 -42.637 < 2e-16 ***
season9 -1.048e+02 2.607e+00 -40.206 < 2e-16 ***
season10 -1.016e+02 2.607e+00 -38.978 < 2e-16 ***
season11 -1.012e+02 2.607e+00 -38.801 < 2e-16 ***
season12 -1.108e+02 2.607e+00 -42.478 < 2e-16 ***
season13 -1.109e+02 2.607e+00 -42.526 < 2e-16 ***
season14 -1.075e+02 2.607e+00 -41.237 < 2e-16 ***
season15 -1.083e+02 2.608e+00 -41.515 < 2e-16 ***
season16 -1.055e+02 2.608e+00 -40.448 < 2e-16 ***
season17 -1.063e+02 2.608e+00 -40.754 < 2e-16 ***
season18 -1.048e+02 2.608e+00 -40.180 < 2e-16 ***
season19 -1.093e+02 2.609e+00 -41.915 < 2e-16 ***
season20 -1.107e+02 2.609e+00 -42.453 < 2e-16 ***
season21 -1.052e+02 2.609e+00 -40.317 < 2e-16 ***
season22 -1.036e+02 2.609e+00 -39.729 < 2e-16 ***
season23 -1.046e+02 2.610e+00 -40.084 < 2e-16 ***
season24 -1.056e+02 2.610e+00 -40.447 < 2e-16 ***
season25 -1.039e+02 2.610e+00 -39.824 < 2e-16 ***
season26 -1.041e+02 2.610e+00 -39.901 < 2e-16 ***
season27 -1.032e+02 2.611e+00 -39.523 < 2e-16 ***
season28 -1.022e+02 2.611e+00 -39.139 < 2e-16 ***
season29 -1.002e+02 2.611e+00 -38.380 < 2e-16 ***
season30 -1.008e+02 2.611e+00 -38.625 < 2e-16 ***
season31 -9.727e+01 2.611e+00 -37.255 < 2e-16 ***
season32 -9.022e+01 2.611e+00 -34.552 < 2e-16 ***
season33 -8.799e+01 2.611e+00 -33.698 < 2e-16 ***
season34 -8.752e+01 2.611e+00 -33.520 < 2e-16 ***
season35 -8.208e+01 2.611e+00 -31.435 < 2e-16 ***
season36 -8.259e+01 2.611e+00 -31.628 < 2e-16 ***
season37 -8.801e+01 2.611e+00 -33.707 < 2e-16 ***
season38 -8.436e+01 2.611e+00 -32.309 < 2e-16 ***
season39 -3.692e-01 2.611e+00 -0.141 0.88754
season40 -2.332e+00 2.611e+00 -0.893 0.37177
season41 -5.300e+00 2.611e+00 -2.030 0.04242 *
season42 -5.585e+00 2.611e+00 -2.139 0.03247 *
season43 -2.926e+00 2.606e+00 -1.123 0.26159
season44 -7.947e+00 2.606e+00 -3.049 0.00231 **
season45 -5.467e+00 2.606e+00 -2.098 0.03600 *
season46 -4.552e+00 2.606e+00 -1.747 0.08077 .
season47 -8.393e+00 2.607e+00 -3.220 0.00129 **
season48 -7.586e+00 2.607e+00 -2.910 0.00364 **
season49 -5.989e+00 2.607e+00 -2.297 0.02166 *
season50 -5.814e+00 2.607e+00 -2.230 0.02579 *
season51 -3.773e+00 2.611e+00 -1.445 0.14846
season52 -5.596e+00 2.611e+00 -2.144 0.03214 *
season53 -5.069e+00 2.611e+00 -1.942 0.05226 .
season54 -4.369e+00 2.611e+00 -1.674 0.09429 .
season55 -4.660e+00 2.618e+00 -1.780 0.07508 .
season56 -1.243e+00 2.618e+00 -0.475 0.63483
season57 -2.054e+00 2.617e+00 -0.785 0.43275
season58 -3.919e+00 2.617e+00 -1.497 0.13442
season59 -2.328e+00 2.623e+00 -0.888 0.37486
season60 -1.916e+00 2.623e+00 -0.730 0.46520
season61 -2.651e+00 2.623e+00 -1.011 0.31220
season62 -3.351e+00 2.623e+00 -1.278 0.20140
season63 -3.150e+00 2.628e+00 -1.199 0.23062
season64 -2.996e+00 2.628e+00 -1.140 0.25431
season65 -3.221e+00 2.628e+00 -1.226 0.22032
season66 -3.431e+00 2.628e+00 -1.306 0.19165
season67 -3.417e+00 2.628e+00 -1.300 0.19364
season68 -4.117e+00 2.628e+00 -1.567 0.11729
season69 -4.015e+00 2.628e+00 -1.528 0.12666
season70 -4.161e+00 2.628e+00 -1.583 0.11349
season71 -3.996e+00 2.623e+00 -1.523 0.12781
season72 -3.411e+00 2.623e+00 -1.300 0.19359
season73 -5.951e+00 2.623e+00 -2.269 0.02335 *
season74 -6.469e+00 2.623e+00 -2.466 0.01370 *
season75 1.426e+01 2.616e+00 5.450 5.35e-08 ***
season76 2.826e+01 2.616e+00 10.803 < 2e-16 ***
season77 4.055e+01 2.616e+00 15.504 < 2e-16 ***
season78 4.002e+01 2.616e+00 15.299 < 2e-16 ***
season79 3.773e+01 2.610e+00 14.457 < 2e-16 ***
season80 3.584e+01 2.610e+00 13.733 < 2e-16 ***
season81 3.467e+01 2.610e+00 13.284 < 2e-16 ***
season82 3.599e+01 2.610e+00 13.789 < 2e-16 ***
season83 4.104e+01 2.608e+00 15.734 < 2e-16 ***
season84 3.676e+01 2.608e+00 14.094 < 2e-16 ***
season85 3.645e+01 2.608e+00 13.974 < 2e-16 ***
season86 3.445e+01 2.608e+00 13.210 < 2e-16 ***
season87 3.413e+01 2.607e+00 13.091 < 2e-16 ***
season88 3.258e+01 2.607e+00 12.497 < 2e-16 ***
season89 3.196e+01 2.607e+00 12.259 < 2e-16 ***
season90 3.130e+01 2.607e+00 12.006 < 2e-16 ***
season91 2.991e+01 2.606e+00 11.476 < 2e-16 ***
season92 2.785e+01 2.606e+00 10.687 < 2e-16 ***
season93 2.742e+01 2.606e+00 10.520 < 2e-16 ***
season94 2.546e+01 2.606e+00 9.768 < 2e-16 ***
season95 7.612e+00 2.606e+00 2.921 0.00351 **
season96 6.379e+00 2.606e+00 2.448 0.01441 *
trend -4.632e-03 1.737e-04 -26.664 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 11.58 on 3737 degrees of freedom
Multiple R-squared: 0.9599, Adjusted R-squared: 0.9589
F-statistic: 922.4 on 97 and 3737 DF, p-value: < 2.2e-16
Validation croisée des 2 régressions linéaires précédentes :
CV(fit1) CV AIC AICc BIC AdjR2
2.584402e+03 3.013516e+04 3.013516e+04 3.015391e+04 2.075976e-01
CV(fit2) # BIC meilleur CV AIC AICc BIC AdjR2
1.376823e+02 1.888504e+04 1.889034e+04 1.950398e+04 9.588661e-01
La deuxième a le meilleur BIC. On préfèrera donc le modèle de régression linéaire avec tendance et saisonnalité.
Check des résidus :
checkresiduals(fit2, test=FALSE, plot=TRUE)checkresiduals(fit2, test='LB', plot=FALSE)
Ljung-Box test
data: Residuals from Linear regression model
Q* = 8638.4, df = 192, p-value < 2.2e-16
Model df: 0. Total lags used: 192
ggAcf(fit2$residuals)ggPacf(fit2$residuals)Les résidus ne sont pas indépendants. Il faudrait donc les extraire et obtenir une série stationnaire à l’aide d’une méthode de différenciation et de vérification des résidus.
fit3 = Arima(power_train, xreg=temperature_train, order=c(1, 0, 0), seasonal=c(0, 1, 0))
summary(fit3)Series: power_train
Regression with ARIMA(1,0,0)(0,1,0)[96] errors
Coefficients:
ar1 xreg
0.7798 0.3065
s.e. 0.0103 0.2406
sigma^2 = 95.55: log likelihood = -13829.12
AIC=27664.24 AICc=27664.24 BIC=27682.92
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set -0.05764918 9.649274 5.619845 -0.1166433 2.610719 0.7128255
ACF1
Training set 0.0006469121
Check des résidus :
checkresiduals(fit3, test=FALSE)checkresiduals(fit3, plot=FALSE)
Ljung-Box test
data: Residuals from Regression with ARIMA(1,0,0)(0,1,0)[96] errors
Q* = 1522.2, df = 191, p-value < 2.2e-16
Model df: 1. Total lags used: 192
fit4 = nnetar(power_train, xreg=temperature_train)
print(fit4)Series: power_train
Model: NNAR(20,1,12)[96]
Call: nnetar(y = power_train, xreg = temperature_train)
Average of 20 networks, each of which is
a 22-12-1 network with 289 weights
options were - linear output units
sigma^2 estimated as 46.35
autoplot(forecast(train))